To run this code in my project using the renv environment, run the following lines of code
install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile
require("ViSEAGO")
## Loading required package: ViSEAGO
##
## Warning: replacing previous import 'data.table::set' by 'dendextend::set' when
## loading 'ViSEAGO'
require("topGO")
## Loading required package: topGO
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: SparseM
## Warning: package 'SparseM' was built under R version 4.3.3
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
require("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ ggplot2::annotate() masks ViSEAGO::annotate()
## ✖ stringr::boundary() masks graph::boundary()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select() masks AnnotationDbi::select()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [4] dplyr_1.1.4 purrr_1.0.2 readr_2.1.5
## [7] tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1
## [10] tidyverse_2.0.0 topGO_2.54.0 SparseM_1.84-2
## [13] GO.db_3.18.0 AnnotationDbi_1.64.1 IRanges_2.34.1
## [16] S4Vectors_0.38.2 Biobase_2.60.0 graph_1.80.0
## [19] BiocGenerics_0.46.0 ViSEAGO_1.16.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.0 jsonlite_1.8.9
## [4] magrittr_2.0.3 rmarkdown_2.28 fs_1.6.4
## [7] zlibbioc_1.46.0 vctrs_0.6.5 memoise_2.0.1
## [10] RCurl_1.98-1.16 webshot_0.5.5 htmltools_0.5.8.1
## [13] progress_1.2.3 dynamicTreeCut_1.63-1 curl_5.2.3
## [16] sass_0.4.9 bslib_0.8.0 htmlwidgets_1.6.4
## [19] plyr_1.8.9 plotly_4.10.4 cachem_1.1.0
## [22] igraph_2.1.2 lifecycle_1.0.4 iterators_1.0.14
## [25] pkgconfig_2.0.3 Matrix_1.6-5 R6_2.5.1
## [28] fastmap_1.2.0 GenomeInfoDbData_1.2.10 digest_0.6.37
## [31] colorspace_2.1-1 RSQLite_2.3.9 seriation_1.5.7
## [34] filelock_1.0.3 timechange_0.3.0 fansi_1.0.6
## [37] httr_1.4.7 compiler_4.3.2 bit64_4.5.2
## [40] withr_3.0.1 BiocParallel_1.34.2 viridis_0.6.5
## [43] DBI_1.2.3 UpSetR_1.4.0 heatmaply_1.5.0
## [46] dendextend_1.19.0 R.utils_2.12.3 biomaRt_2.58.2
## [49] rappdirs_0.3.3 tools_4.3.2 R.oo_1.27.0
## [52] glue_1.8.0 DiagrammeR_1.0.11 GOSemSim_2.28.1
## [55] grid_4.3.2 fgsea_1.28.0 generics_0.1.3
## [58] gtable_0.3.5 tzdb_0.4.0 R.methodsS3_1.8.2
## [61] ca_0.71.1 data.table_1.16.2 hms_1.1.3
## [64] xml2_1.3.6 utf8_1.2.4 XVector_0.40.0
## [67] foreach_1.5.2 pillar_1.9.0 yulab.utils_0.1.9
## [70] BiocFileCache_2.10.2 lattice_0.22-6 renv_1.0.11
## [73] bit_4.5.0 tidyselect_1.2.1 registry_0.5-1
## [76] Biostrings_2.70.3 knitr_1.48 gridExtra_2.3
## [79] xfun_0.48 matrixStats_1.4.1 DT_0.33
## [82] visNetwork_2.1.2 stringi_1.8.4 lazyeval_0.2.2
## [85] yaml_2.3.10 evaluate_1.0.1 codetools_0.2-20
## [88] BiocManager_1.30.25 cli_3.6.3 munsell_0.5.1
## [91] jquerylib_0.1.4 Rcpp_1.0.13-1 GenomeInfoDb_1.36.4
## [94] dbplyr_2.5.0 png_0.1-8 XML_3.99-0.17
## [97] parallel_4.3.2 assertthat_0.2.1 blob_1.2.4
## [100] prettyunits_1.2.0 AnnotationForge_1.44.0 bitops_1.0-9
## [103] viridisLite_0.4.2 scales_1.3.0 crayon_1.5.3
## [106] rlang_1.1.4 cowplot_1.1.3 fastmatch_1.1-6
## [109] KEGGREST_1.40.1 TSP_1.2-4
I am going to perform functional enrichment of GO terms using ViSEAGO.
I am following this vignette: http://bioconductor.unipi.it/packages/devel/bioc/vignettes/ViSEAGO/inst/doc/ViSEAGO.html.
In the next chunk I am loading in my DESeq data. These results are ordered by adjusted p-value. As a reminder, negative LFC = higher in Aboral tissue, and positive LFC = higher in Oral tissue.
#load in DESeq results
DESeq <- read.csv("../output_RNA/differential_expression/DESeq_results.csv", header = TRUE) %>% dplyr::rename("query" ="X")
#make dataframes of just differentially expressed genes for each LFC direction
DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange > 2)
DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange < -2)
#load in annotation data
annot_tab <- read.delim("../references/annotation/protein-GO.tsv") %>% dplyr::rename(GOs = GeneOntologyIDs)
#filter annotation data for just expressed genes with GO annotations
annot_tab <- annot_tab %>% filter(query %in% DESeq$query)
annot_tab$GOs <- gsub("; ", ";", annot_tab$GOs)
annot_tab$GOs[annot_tab$GOs==""] <- NA
annot_tab <- annot_tab %>% filter(!is.na(GOs))
nrow(annot_tab)
## [1] 10638
nrow(annot_tab)/nrow(DESeq)
## [1] 0.7354812
10638/14464 genes in our dataset have GO information in this file. That is 74%.
sum(annot_tab$query %in% DE_05_Aboral$query)
## [1] 278
sum(annot_tab$query %in% DE_05_Aboral$query)/nrow(DE_05_Aboral)
## [1] 0.6797066
sum(annot_tab$query %in% DE_05_OralEpi$query)
## [1] 546
sum(annot_tab$query %in% DE_05_OralEpi$query)/nrow(DE_05_OralEpi)
## [1] 0.6275862
545/804 genes that are significantly upregulated in the Aboral tissue have annotation information. That is 68% of the genes.
1979/2802 genes that are significantly upregulated in the Oral Epidermis tissue have annotation information. That is 71% of the genes.
##Get a list of GO Terms for all genes
annots <- annot_tab %>% dplyr::select(query,GOs) %>% dplyr::rename("GO.terms" = GOs)
# format into the format required by ViSEAGO for custom mappings
Custom_GOs <- annots %>%
# Separate GO terms into individual rows
separate_rows(GO.terms, sep = ";") %>%
# Add necessary columns
mutate(
taxid = "pacuta",
gene_symbol = query,
evidence = "SwissProt"
) %>%
# Rename columns
dplyr::rename(
gene_id = query,
GOID = GO.terms
) %>%
dplyr::select(taxid, gene_id, gene_symbol, GOID, evidence)
Custom_GOs_valid <- Custom_GOs %>% filter(GOID %in% keys(GO.db))
write.table(Custom_GOs_valid, "../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt",row.names = FALSE, sep = "\t", quote = FALSE,col.names=TRUE)
length(unique(Custom_GOs$gene_id))
## [1] 10638
length(unique(Custom_GOs_valid$gene_id))
## [1] 10637
We seem to have lost one gene when filtering for valid GO terms, so I need to aMFount for that below.
Custom_Pacuta <- ViSEAGO::Custom2GO("../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt")
## 'select()' returned 1:1 mapping between keys and columns
myGENE2GO_Pacuta <- ViSEAGO::annotate(
id="pacuta",
Custom_Pacuta
)
selection <- DESeq %>% filter(query %in% Custom_GOs_valid$gene_id) %>%
mutate(DE_05_Aboral = ifelse(query %in% DE_05_Aboral$query, 1,0)) %>%
mutate(DE_05_Oral = ifelse(query %in% DE_05_OralEpi$query, 1,0)) %>%
mutate(expressed = 1)
selection_Aboral <- selection %>% pull(DE_05_Aboral) %>% as.factor()
names(selection_Aboral) <- selection %>% pull(query)
selection_Oral <- selection %>% pull(DE_05_Oral) %>% as.factor()
names(selection_Oral) <- selection %>% pull(query)
expressed <- selection %>% pull(expressed) %>% as.factor()
names(expressed) <- selection %>% pull(query)
# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)
BP_Oral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="BP",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 8920 GO terms found. )
##
## Build GO DAG topology ..........
## ( 12838 GO terms and 28523 relations. )
##
## Annotating nodes ...............
## ( 9534 genes annotated to the GO terms. )
# perform TopGO test using classic algorithm
classic_Oral <- topGO::runTest(
BP_Oral,
algorithm ="classic",
statistic = "fisher"
)
##
## -- Classic Algorithm --
##
## the algorithm is scoring 3938 nontrivial nodes
## parameters:
## test statistic: fisher
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list(Oral = c("BP_Oral", "classic_Oral"))
)
## 'select()' returned 1:1 mapping between keys and columns
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Oral
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 546
## feasible_genes: 9534
## feasible_genes_significant: 472
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## classic_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher
## algorithm_name: classic
## GO_scored: 6852
## GO_significant: 126
## feasible_genes: 9534
## feasible_genes_significant: 472
## genes_nodeSize: 5
## Nontrivial_nodes: 3938
## - enrichment pvalue cutoff:
## Oral : 0.01
## - enrich GOs (in at least one list): 126 GO terms of 1 conditions.
## Oral : 126 terms
# display the merged table
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_Fisher.csv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
# compute all available Semantic Similarity (SS) measures
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance="Wang"
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 546
## feasible_genes: 9534
## feasible_genes_significant: 472
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## classic_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher
## algorithm_name: classic
## GO_scored: 6852
## GO_significant: 126
## feasible_genes: 9534
## feasible_genes_significant: 472
## genes_nodeSize: 5
## Nontrivial_nodes: 3938
## - enrichment pvalue cutoff:
## Oral : 0.01
## - enrich GOs (in at least one list): 126 GO terms of 1 conditions.
## Oral : 126 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2<-ViSEAGO::GOterms_heatmap(
myGOs,
showIC=TRUE,
showGOlabels=TRUE,
GO.tree=list(
tree=list(
distance="Wang",
aggreg.method="ward.D2"
),
cut=list(
dynamic=list(
pamStage=TRUE,
pamRespectsDendro=TRUE,
deepSplit=2,
minClusterSize =2
)
)
),
samples.tree=NULL
)
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOterms"
)
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.csv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOterms")
# calculate semantic similarites between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# build and highlight in an interactive MDSplot grouped clusters for one distance object
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOclusters")
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOclusters")
# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)
BP_Aboral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="BP",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 8920 GO terms found. )
##
## Build GO DAG topology ..........
## ( 12838 GO terms and 28523 relations. )
##
## Annotating nodes ...............
## ( 9534 genes annotated to the GO terms. )
# perform TopGO test using classic algorithm
classic_Aboral <- topGO::runTest(
BP_Aboral,
algorithm ="classic",
statistic = "fisher"
)
##
## -- Classic Algorithm --
##
## the algorithm is scoring 2859 nontrivial nodes
## parameters:
## test statistic: fisher
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list(Aboral = c("BP_Aboral", "classic_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Aboral
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 278
## feasible_genes: 9534
## feasible_genes_significant: 243
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## classic_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher
## algorithm_name: classic
## GO_scored: 6852
## GO_significant: 336
## feasible_genes: 9534
## feasible_genes_significant: 243
## genes_nodeSize: 5
## Nontrivial_nodes: 2859
## - enrichment pvalue cutoff:
## Aboral : 0.01
## - enrich GOs (in at least one list): 336 GO terms of 1 conditions.
## Aboral : 336 terms
# display the merged table
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_Fisher.csv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
# compute all available Semantic Similarity (SS) measures
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance="Wang"
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Aboral
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 278
## feasible_genes: 9534
## feasible_genes_significant: 243
## genes_nodeSize: 5
## nodes_number: 6852
## edges_number: 14732
## classic_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher
## algorithm_name: classic
## GO_scored: 6852
## GO_significant: 336
## feasible_genes: 9534
## feasible_genes_significant: 243
## genes_nodeSize: 5
## Nontrivial_nodes: 2859
## - enrichment pvalue cutoff:
## Aboral : 0.01
## - enrich GOs (in at least one list): 336 GO terms of 1 conditions.
## Aboral : 336 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2<-ViSEAGO::GOterms_heatmap(
myGOs,
showIC=TRUE,
showGOlabels=TRUE,
GO.tree=list(
tree=list(
distance="Wang",
aggreg.method="ward.D2"
),
cut=list(
dynamic=list(
pamStage=TRUE,
pamRespectsDendro=TRUE,
deepSplit=2,
minClusterSize =2
)
)
),
samples.tree=NULL
)
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOterms"
)
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.csv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOterms")
# calculate semantic similarites between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# build and highlight in an interactive MDSplot grouped clusters for one distance object
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOclusters")
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOclusters")
library(clusterProfiler)
## Warning: package 'clusterProfiler' was built under R version 4.3.3
## clusterProfiler v4.10.1 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:purrr':
##
## simplify
## The following object is masked from 'package:AnnotationDbi':
##
## select
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
library(enrichplot)
library(GO.db)
DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange > 1)
# Create TERM2GENE data.frame
TERM2GENE <- Custom_GOs_valid %>%
dplyr::select(GOID, gene_id) %>%
dplyr::distinct() # remove potential duplicates
gene_list_aboral <- DE_05_Aboral$query
ego_aboral <- enricher(
gene = gene_list_aboral,
TERM2GENE = TERM2GENE,
pvalueCutoff = 0.05,
qvalueCutoff = 0.05
)
ego_aboral@result$Description <- Term(ego_aboral@result$ID)
dotplot(ego_aboral, showCategory = 20)
barplot(ego_aboral, showCategory = 20, title = "GO Barplot (Aboral)")
# Clean ego_aboral@result$geneID
ego_aboral@result$geneID <- gsub("Pocillopora_acuta_HIv2___", "", ego_aboral@result$geneID)
ego_aboral@result$geneID <- gsub("\\.t1[a-z]*", "", ego_aboral@result$geneID)
treeplot(pairwise_termsim(ego_aboral))
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
########
DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange < 0)
gene_list_oral <- DE_05_OralEpi$query
ego_oral <- enricher(
gene = gene_list_oral,
pAdjustMethod="none",
TERM2GENE = TERM2GENE,
pvalueCutoff = 0.05,
qvalueCutoff = 1
)
ego_oral
## #
## # over-representation test
## #
## #...@organism UNKNOWN
## #...@ontology UNKNOWN
## #...@gene chr [1:2802] "Pocillopora_acuta_HIv2___RNAseq.g6351.t1" ...
## #...pvalues adjusted by 'none' with cutoff <0.05
## #...108 enriched terms found
## 'data.frame': 108 obs. of 9 variables:
## $ ID : chr "GO:1902495" "GO:0045211" "GO:0008593" "GO:1902476" ...
## $ Description: chr "GO:1902495" "GO:0045211" "GO:0008593" "GO:1902476" ...
## $ GeneRatio : chr "11/1978" "44/1978" "11/1978" "15/1978" ...
## $ BgRatio : chr "14/10637" "123/10637" "17/10637" "32/10637" ...
## $ pvalue : num 1.87e-06 4.57e-06 3.66e-05 2.40e-04 2.63e-04 ...
## $ p.adjust : num 1.87e-06 4.57e-06 3.66e-05 2.40e-04 2.63e-04 ...
## $ qvalue : num 0.00423 0.00515 0.02753 0.10955 0.10955 ...
## $ geneID : chr "Pocillopora_acuta_HIv2___RNAseq.g28741.t1/Pocillopora_acuta_HIv2___RNAseq.g21572.t1a/Pocillopora_acuta_HIv2___R"| __truncated__ "Pocillopora_acuta_HIv2___RNAseq.g5252.t1/Pocillopora_acuta_HIv2___RNAseq.g28202.t1/Pocillopora_acuta_HIv2___RNA"| __truncated__ "Pocillopora_acuta_HIv2___RNAseq.g7241.t1/Pocillopora_acuta_HIv2___RNAseq.g7243.t1/Pocillopora_acuta_HIv2___RNAs"| __truncated__ "Pocillopora_acuta_HIv2___RNAseq.g19042.t1/Pocillopora_acuta_HIv2___RNAseq.g14871.t1/Pocillopora_acuta_HIv2___RN"| __truncated__ ...
## $ Count : int 11 44 11 15 14 19 32 17 8 8 ...
## #...Citation
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu.
## clusterProfiler 4.0: A universal enrichment tool for interpreting omics data.
## The Innovation. 2021, 2(3):100141
ego_oral@result$Description <- Term(ego_oral@result$ID)
upsetplot(ego_oral)
dotplot(ego_oral, showCategory = 20)
barplot(ego_oral, showCategory = 20, title = "GO Barplot (oral)")
# Clean ego_oral@result$geneID
ego_oral@result$geneID <- gsub("Pocillopora_acuta_HIv2___", "", ego_oral@result$geneID)
ego_oral@result$geneID <- gsub("\\.t1[a-z]*", "", ego_oral@result$geneID)
treeplot(pairwise_termsim(ego_oral), hclust_method = "ward.D2",nwords=2)
## Warning in treeplot.enrichResult(x, ...): Use 'cluster.params = list(method = your_value)' instead of 'hclust_method'.
## The hclust_method parameter will be removed in the next version.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.